{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Text Distances\n",
    "\n",
    "\n",
    "This notebook illustrates how to use the Levenstein distance (edit distance) in TensorFlow.\n",
    "\n",
    "Get required libarary and start tensorflow session."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "sess = tf.Session()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### First compute the edit distance between 'bear' and 'beers'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 2.]]\n"
     ]
    }
   ],
   "source": [
    "hypothesis = list('bear')\n",
    "truth = list('beers')\n",
    "h1 = tf.SparseTensor([[0,0,0], [0,0,1], [0,0,2], [0,0,3]],\n",
    "                     hypothesis,\n",
    "                     [1,1,1])\n",
    "\n",
    "t1 = tf.SparseTensor([[0,0,0], [0,0,1], [0,0,2], [0,0,3],[0,0,4]],\n",
    "                     truth,\n",
    "                     [1,1,1])\n",
    "\n",
    "print(sess.run(tf.edit_distance(h1, t1, normalize=False)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Compute the edit distance between ('bear','beer') and 'beers':"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 0.40000001  0.2       ]]\n"
     ]
    }
   ],
   "source": [
    "hypothesis2 = list('bearbeer')\n",
    "truth2 = list('beersbeers')\n",
    "h2 = tf.SparseTensor([[0,0,0], [0,0,1], [0,0,2], [0,0,3], [0,1,0], [0,1,1], [0,1,2], [0,1,3]],\n",
    "                     hypothesis2,\n",
    "                     [1,2,4])\n",
    "\n",
    "t2 = tf.SparseTensor([[0,0,0], [0,0,1], [0,0,2], [0,0,3], [0,0,4], [0,1,0], [0,1,1], [0,1,2], [0,1,3], [0,1,4]],\n",
    "                     truth2,\n",
    "                     [1,2,5])\n",
    "\n",
    "print(sess.run(tf.edit_distance(h2, t2, normalize=True)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Now compute distance between four words and 'beers' more efficiently:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 0.40000001]\n",
      " [ 0.60000002]\n",
      " [ 1.        ]\n",
      " [ 1.        ]]\n"
     ]
    }
   ],
   "source": [
    "hypothesis_words = ['bear','bar','tensor','flow']\n",
    "truth_word = ['beers']\n",
    "\n",
    "num_h_words = len(hypothesis_words)\n",
    "h_indices = [[xi, 0, yi] for xi,x in enumerate(hypothesis_words) for yi,y in enumerate(x)]\n",
    "h_chars = list(''.join(hypothesis_words))\n",
    "\n",
    "h3 = tf.SparseTensor(h_indices, h_chars, [num_h_words,1,1])\n",
    "\n",
    "truth_word_vec = truth_word*num_h_words\n",
    "t_indices = [[xi, 0, yi] for xi,x in enumerate(truth_word_vec) for yi,y in enumerate(x)]\n",
    "t_chars = list(''.join(truth_word_vec))\n",
    "\n",
    "t3 = tf.SparseTensor(t_indices, t_chars, [num_h_words,1,1])\n",
    "\n",
    "print(sess.run(tf.edit_distance(h3, t3, normalize=True)))"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda env:tf-cpu]",
   "language": "python",
   "name": "conda-env-tf-cpu-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
